Packages

pacman::p_load(
  dplyr,
  tidyverse,
  readxl,
  vegan,
  reshape2,
  RColorBrewer,
  data.table,
  grid,
  ggh4x,
  PoweR,
  cowplot,
  ggpubr,
  tibble)
# setwd("D:PhD/01_Data/03_Vitro/02_Identification of glycan utilizing microbe") # Windows
# setwd("/Volumes/Yiming_Wang/PhD/01_Data/02_Probiotics/Model_2") # Mac
# setwd("/Users/godric/Desktop/Fut2 review comments/Revision") #Macbook pro
setwd("/Users/godric/Documents/GitHub/2024_ISME_Fut2_Bifidobacterium/01_Data")

df_all <- read_excel("Model_2_probiotic.xlsx") %>% 
  filter(TissueType != "Faeces") %>% 
  separate(GenotypeSex, c("Genotype","Sex"),sep="_", remove=FALSE) %>% 
  mutate(Genotype=factor(Genotype, levels = c("WT","KO"))) %>% 
  mutate(TissueType=ifelse(TissueType == "ST1_T", "Proximal small intestine",
                           ifelse(TissueType == "ST2_T", "Distal small intestine","Large intestine"))) %>% 
  mutate(TissueType=factor(TissueType, levels=c("Proximal small intestine","Distal small intestine","Large intestine"))) %>% 
  rename(log10_copies_g_tissue = log10_Copies_faeces)


df_all_abxinfantis <- df_all %>% 
  filter(Group == "ABX_PRO")

df_all_infantis <- df_all %>% 
  filter(Group == "PRO")
  
# Separate tissue
## Abx+infantis
df_abxinfantis_ST1 <- df_all_abxinfantis %>% 
  filter(TissueType == "Proximal small intestine")

df_abxinfantis_ST2 <- df_all_abxinfantis %>% 
  filter(TissueType == "Distal small intestine")  
  
df_abxinfantis_LT <- df_all_abxinfantis %>% 
  filter(TissueType == "Large intestine")    

## infantis
df_infantis_ST1 <- df_all_infantis %>% 
  filter(TissueType == "Proximal small intestine")

df_infantis_ST2 <- df_all_infantis %>% 
  filter(TissueType == "Distal small intestine")  
  
df_infantis_LT <- df_all_infantis %>% 
  filter(TissueType == "Large intestine")   

Antibiotic + probiotic group

Stats

# Normality
## D'Agostino-Pearson normality test ("omnibus K2" test) - Parametric data here
  ## Null (H0): The data are normally distributed Alternate/the data are sampled from a Gaussian distribution
  ## The low p-value (<0.05) indicates you reject that null hypothesis and so accept the alternative hypothesis that the data are not sampled from a Gaussian population
  ## https://www.rdocumentation.org/packages/PoweR/versions/1.0.7/topics/statcompute
  ## in this case, all notmally distributed, therefore one-way anova is chosen

## ST1: non-parametric
df_ST1_WT <- df_abxinfantis_ST1 %>% 
  filter(Genotype == "WT")

df_ST1_KO <- df_abxinfantis_ST1 %>% 
  filter(Genotype == "KO")

statcompute(6,df_ST1_WT$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL) # non-prametric
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,df_ST1_KO$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
## ST2: non-parametric
df_ST2_WT <- df_abxinfantis_ST2 %>% 
  filter(Genotype == "WT")

df_ST2_KO <- df_abxinfantis_ST2 %>% 
  filter(Genotype == "KO")

statcompute(6,df_ST2_WT$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL) # non-prametric
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,df_ST2_KO$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
## LT: : non-parametric
df_LT_WT <- df_abxinfantis_LT %>% 
  filter(Genotype == "WT")

df_LT_KO <- df_abxinfantis_LT %>% 
  filter(Genotype == "KO")

statcompute(6,df_LT_WT$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL) # non-prametric
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,df_LT_KO$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
# If the normality results are not consistent for different tisses, I will classify them as non-parametric data for consistency as they are in one figure. In this case, they will be considered as non-parametric data, thus, Mann-whitney tests were employed.

# Summary of data 
## ST1
Summary_ST1 <- df_abxinfantis_ST1 %>% 
  group_by(Genotype) %>% 
  summarise(
    count = n(),
    median= median (log10_copies_g_tissue, na.rm=TRUE),
    IQR = IQR (log10_copies_g_tissue,na.rm=TRUE),
    IQR25 = quantile(log10_copies_g_tissue, 0.25),
    IQR75 = quantile(log10_copies_g_tissue, 0.75)
  )
Summary_ST1
## # A tibble: 2 × 6
##   Genotype count median   IQR IQR25 IQR75
##   <fct>    <int>  <dbl> <dbl> <dbl> <dbl>
## 1 WT           5   3.56 0.509  3.42  3.93
## 2 KO           5   2.58 0      2.58  2.58
## ST2
Summary_ST2 <- df_abxinfantis_ST2 %>% 
  group_by(Genotype) %>% 
  summarise(
    count = n(),
    median= median (log10_copies_g_tissue, na.rm=TRUE),
    IQR = IQR (log10_copies_g_tissue,na.rm=TRUE),
    IQR25 = quantile(log10_copies_g_tissue, 0.25),
    IQR75 = quantile(log10_copies_g_tissue, 0.75)
  )
Summary_ST2
## # A tibble: 2 × 6
##   Genotype count median   IQR IQR25 IQR75
##   <fct>    <int>  <dbl> <dbl> <dbl> <dbl>
## 1 WT           5   5.07  1.62  3.80  5.41
## 2 KO           5   3.42  1.22  2.58  3.80
## LT
Summary_LT <- df_abxinfantis_LT %>% 
  group_by(Genotype) %>% 
  summarise(
    count = n(),
    median= median (log10_copies_g_tissue, na.rm=TRUE),
    IQR = IQR (log10_copies_g_tissue,na.rm=TRUE),
    IQR25 = quantile(log10_copies_g_tissue, 0.25),
    IQR75 = quantile(log10_copies_g_tissue, 0.75)
  )
Summary_LT
## # A tibble: 2 × 6
##   Genotype count median   IQR IQR25 IQR75
##   <fct>    <int>  <dbl> <dbl> <dbl> <dbl>
## 1 WT           5   6.65 0.783  5.99  6.78
## 2 KO           5   2.58 1.33   2.58  3.91
# Mann-whitney test
## ST1
stats_ST1 <- wilcox.test(log10_copies_g_tissue ~ Genotype, data=df_abxinfantis_ST1,
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats_ST1
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log10_copies_g_tissue by Genotype
## W = 21, p-value = 0.08467
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.6180822  3.0723308
## sample estimates:
## difference in location 
##               0.986382
## ST2
stats_ST2 <- wilcox.test(log10_copies_g_tissue ~ Genotype, data=df_abxinfantis_ST2,
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats_ST2
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log10_copies_g_tissue by Genotype
## W = 21, p-value = 0.09369
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -0.2448975  2.8339044
## sample estimates:
## difference in location 
##               1.305224
## LT
stats_LT <- wilcox.test(log10_copies_g_tissue ~ Genotype, data=df_abxinfantis_LT,
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats_LT
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log10_copies_g_tissue by Genotype
## W = 25, p-value = 0.01116
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  1.665608 5.888703
## sample estimates:
## difference in location 
##               3.414258

Plot

# Draw bar plot
set.seed(123)

# One figure
figure <- ggbarplot(
  df_all_abxinfantis, x = "TissueType", y = "log10_copies_g_tissue", 
   add = c("median_iqr", "jitter"), 
   add.params = list(shape = "Genotype"),
   fill= "Genotype", palette = c("#807F7F", "#BF504D"),
   position = position_dodge(0.8)
  )+
  labs(x="", y=expression(atop('B. infantis',paste('(log'['10']*'copies/g tissue)'), side=2, line=2)), caption ="")+
  theme(legend.position = "bottom",
        axis.text.x=element_text(colour="black", face="plain", size=14), 
        axis.text.y=element_text(colour="black", face="plain", size=14),
        axis.title.x=element_text(margin = margin(t = 10)),
        axis.title.y=element_text(margin = margin(r = 10), size=16),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=16, hjust = 0.5),

        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 0.3,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
    scale_fill_manual(values=alpha(c("#30436d","#654e3a"),0.8))+ #BC3C29B2 and #0072B5B2,"#374E55B2" and "#DF8F44B2"
    coord_cartesian(ylim=c(0,10))+
    scale_y_continuous(breaks = seq(0,10,2)) 

figure

figure <- ggplot(data = df_all_abxinfantis %>% 
                  group_by(TissueType,Genotype) %>% 
                  ungroup(), 
                  aes(x=TissueType, y=log10_copies_g_tissue, color=Genotype))+
          geom_bar(data = df_all_abxinfantis %>% 
                    group_by(TissueType,Genotype) %>% 
                    summarise(mediancopies=median(log10_copies_g_tissue)) %>% 
                    ungroup(), 
                    aes(x=TissueType, y=mediancopies, fill=Genotype),
                    width = 0.5,
                    stat = "identity",
                    color="black",
                    position=position_dodge(width=0.7))+
          geom_jitter(aes(x=TissueType, y=log10_copies_g_tissue),size=2.5,
                      position = position_jitterdodge(jitter.width=0.3,dodge.width=0.7))+
  labs(x="", y=expression(atop(italic("B. infantis"),paste('(log'['10']*'copies/g tissue)'), side=2, line=2)), caption ="")+
  theme(legend.position = "bottom",
        axis.text.x=element_text(colour="black", face="plain", size=18), 
        axis.text.y=element_text(colour="black", face="plain", size=18),
        axis.title.x=element_text(margin = margin(t = 10)),
        axis.title.y=element_text(margin = margin(r = 10), size=18),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=16, hjust = 0.5),
        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        panel.background = element_rect(fill="transparent", colour = "transparent"),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 0.25,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
    scale_fill_manual(values=alpha(c("#30436d","#654e3a"),0.8))+ #BC3C29B2 and #0072B5B2,"#374E55B2" and "#DF8F44B2"
    scale_color_manual(values=alpha(c("black","black"),0.6))+
    coord_cartesian(ylim=c(0,12))+
    scale_y_continuous(breaks = seq(0,12,2))
figure

figure2 <- figure + geom_line(data=tibble(x=c(2.8,3.2),y=c(10,10)),
              aes(x=x,y=y),
              inherit.aes=FALSE)+
         geom_line(data=tibble(x=c(1.8,2.2),y=c(6.5,6.5)),
              aes(x=x,y=y),
              inherit.aes=FALSE)+
         geom_line(data=tibble(x=c(0.8,1.2),y=c(7,7)),
              aes(x=x,y=y),
              inherit.aes=FALSE)+
        geom_text(aes(x=3, y=10.5),
             label=expression(paste(italic("P")," = 0.011")), size = 6,
             inherit.aes=FALSE)+
        geom_text(aes(x=2, y=7),
             label=expression(paste(italic("P"), " > 0.05")), size = 6,
             inherit.aes=FALSE)+
        geom_text(aes(x=1, y=7.5),
             label=expression(paste(italic("P"), " > 0.05")), size = 6,
             inherit.aes=FALSE)

print(figure2)

        # geom_text(data=tibble(x=3, y=10.5),
        #      aes(x=x,y=y,label=expression(paste(italic("P")," = 0.011"))), size = 6,
        #      inherit.aes=FALSE)+
        # geom_text(data=tibble(x=2, y=7),
        #      aes(x=x,y=y,label=expression(paste(italic("P"), " > 0.05"))), size = 6,
        #      inherit.aes=FALSE)+
        # geom_text(data=tibble(x=1, y=7.5),
        #      aes(x=x,y=y,label=expression(paste(italic("P"), " > 0.05"))), size = 6,
        #      inherit.aes=FALSE)

Probiotic Group

Stats

# Normality
## D'Agostino-Pearson normality test ("omnibus K2" test) - Parametric data here
  ## Null (H0): The data are normally distributed Alternate/the data are sampled from a Gaussian distribution
  ## The low p-value (<0.05) indicates you reject that null hypothesis and so accept the alternative hypothesis that the data are not sampled from a Gaussian population
  ## https://www.rdocumentation.org/packages/PoweR/versions/1.0.7/topics/statcompute
  ## in this case, all notmally distributed, therefore one-way anova is chosen

## ST1: non-parametric
df_ST1_WT <- df_infantis_ST1 %>% 
  filter(Genotype == "WT")

df_ST1_KO <- df_infantis_ST1 %>% 
  filter(Genotype == "KO")

statcompute(6,df_ST1_WT$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL) # non-prametric
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,df_ST1_KO$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
## ST2: non-parametric
df_ST2_WT <- df_infantis_ST2 %>% 
  filter(Genotype == "WT")

df_ST2_KO <- df_infantis_ST2 %>% 
  filter(Genotype == "KO")

statcompute(6,df_ST2_WT$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL) # non-prametric
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,df_ST2_KO$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
## LT: : non-parametric
df_LT_WT <- df_infantis_LT %>% 
  filter(Genotype == "WT")

df_LT_KO <- df_infantis_LT %>% 
  filter(Genotype == "KO")

statcompute(6,df_LT_WT$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL) # non-prametric
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,df_LT_KO$`log10_copies_g_tissue`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] NaN
## 
## $pvalue
## [1] NaN
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
# If the normality results are not consistent for different tisses, I will classify them as non-parametric data for consistency as they are in one figure. In this case, they will be considered as non-parametric data, thus, Mann-whitney tests were employed.

# Summary of data 
## ST1
Summary_ST1 <- df_infantis_ST1 %>% 
  group_by(Genotype) %>% 
  summarise(
    count = n(),
    median= median (log10_copies_g_tissue, na.rm=TRUE),
    IQR = IQR (log10_copies_g_tissue,na.rm=TRUE),
    IQR25 = quantile(log10_copies_g_tissue, 0.25),
    IQR75 = quantile(log10_copies_g_tissue, 0.75)
  )
Summary_ST1
## # A tibble: 2 × 6
##   Genotype count median   IQR IQR25 IQR75
##   <fct>    <int>  <dbl> <dbl> <dbl> <dbl>
## 1 WT           5   2.58     0  2.58  2.58
## 2 KO           5   2.58     0  2.58  2.58
## ST2
Summary_ST2 <- df_infantis_ST2 %>% 
  group_by(Genotype) %>% 
  summarise(
    count = n(),
    median= median (log10_copies_g_tissue, na.rm=TRUE),
    IQR = IQR (log10_copies_g_tissue,na.rm=TRUE),
    IQR25 = quantile(log10_copies_g_tissue, 0.25),
    IQR75 = quantile(log10_copies_g_tissue, 0.75)
  )
Summary_ST2
## # A tibble: 2 × 6
##   Genotype count median   IQR IQR25 IQR75
##   <fct>    <int>  <dbl> <dbl> <dbl> <dbl>
## 1 WT           5   2.58 0.371  2.58  2.95
## 2 KO           5   2.58 0      2.58  2.58
## LT
Summary_LT <- df_infantis_LT %>% 
  group_by(Genotype) %>% 
  summarise(
    count = n(),
    median= median (log10_copies_g_tissue, na.rm=TRUE),
    IQR = IQR (log10_copies_g_tissue,na.rm=TRUE),
    IQR25 = quantile(log10_copies_g_tissue, 0.25),
    IQR75 = quantile(log10_copies_g_tissue, 0.75)
  )
Summary_LT
## # A tibble: 2 × 6
##   Genotype count median   IQR IQR25 IQR75
##   <fct>    <int>  <dbl> <dbl> <dbl> <dbl>
## 1 WT           5   2.58     0  2.58  2.58
## 2 KO           5   2.58     0  2.58  2.58
# Mann-whitney test
## ST1
stats_ST1 <- wilcox.test(log10_copies_g_tissue ~ Genotype, data=df_infantis_ST1,
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats_ST1
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log10_copies_g_tissue by Genotype
## W = 15, p-value = 0.4237
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  0.0000000 0.7209326
## sample estimates:
## difference in location 
##                      0
## ST2
stats_ST2 <- wilcox.test(log10_copies_g_tissue ~ Genotype, data=df_infantis_ST2,
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats_ST2
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log10_copies_g_tissue by Genotype
## W = 17.5, p-value = 0.1797
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  0.000000 1.250959
## sample estimates:
## difference in location 
##            7.96168e-05
## LT
stats_LT <- wilcox.test(log10_copies_g_tissue ~ Genotype, data=df_infantis_LT,
                     paired=FALSE, correct=TRUE,  conf.level=0.95, exact=FALSE)
stats_LT
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log10_copies_g_tissue by Genotype
## W = 12.5, p-value = NA
## alternative hypothesis: true location shift is not equal to 0

plot

# Draw bar plot
set.seed(123)

# One figure
figure <- ggbarplot(
  df_all_infantis, x = "TissueType", y = "log10_copies_g_tissue", 
   add = c("median_iqr", "jitter"), 
   add.params = list(shape = "Genotype"),
   fill= "Genotype", palette = c("#807F7F", "#BF504D"),
   position = position_dodge(0.8)
  )+
  labs(x="", y=expression(atop('B. infantis',paste('(log'['10']*'copies/g tissue)'), side=2, line=2)), caption ="")+
  theme(legend.position = "bottom",
        axis.text.x=element_text(colour="black", face="plain", size=14), 
        axis.text.y=element_text(colour="black", face="plain", size=14),
        axis.title.x=element_text(margin = margin(t = 10)),
        axis.title.y=element_text(margin = margin(r = 10), size=16),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=16, hjust = 0.5),

        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 0.3,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
    scale_fill_manual(values=alpha(c("#30436d","#654e3a"),0.8))+ #BC3C29B2 and #0072B5B2,"#374E55B2" and "#DF8F44B2"
    coord_cartesian(ylim=c(0,6))+
    scale_y_continuous(breaks = seq(0,6,1)) 

figure

figure <- ggplot(data = df_all_infantis %>% 
                  group_by(TissueType,Genotype) %>% 
                  ungroup(), 
                  aes(x=TissueType, y=log10_copies_g_tissue, color=Genotype))+
          geom_bar(data = df_all_infantis %>% 
                    group_by(TissueType,Genotype) %>% 
                    summarise(mediancopies=median(log10_copies_g_tissue)) %>% 
                    ungroup(), 
                    aes(x=TissueType, y=mediancopies, fill=Genotype),
                    width = 0.5,
                    stat = "identity",
                    color="black",
                    position=position_dodge(width=0.7))+
          geom_jitter(aes(x=TissueType, y=log10_copies_g_tissue),size=2.5,
                      position = position_jitterdodge(jitter.width=0.3,dodge.width=0.7))+
  labs(x="", y=expression(atop(italic("B. infantis"),paste('(log'['10']*'copies/g tissue)'), side=2, line=2)), caption ="")+
  theme(legend.position = "bottom",
        axis.text.x=element_text(colour="black", face="plain", size=18), 
        axis.text.y=element_text(colour="black", face="plain", size=18),
        axis.title.x=element_text(margin = margin(t = 10)),
        axis.title.y=element_text(margin = margin(r = 10), size=18),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=16, hjust = 0.5),
        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        panel.background = element_rect(fill="transparent", colour = "transparent"),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 0.25,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
    scale_fill_manual(values=alpha(c("#30436d","#654e3a"),0.8))+ #BC3C29B2 and #0072B5B2,"#374E55B2" and "#DF8F44B2"
    scale_color_manual(values=alpha(c("black","black"),0.6))+
    coord_cartesian(ylim=c(0,7))+
    scale_y_continuous(breaks = seq(0,7,1))
figure

figure2 <- figure + geom_line(data=tibble(x=c(2.8,3.2),y=c(3.5,3.5)),
              aes(x=x,y=y),
              inherit.aes=FALSE)+
         geom_line(data=tibble(x=c(1.8,2.2),y=c(5,5)),
              aes(x=x,y=y),
              inherit.aes=FALSE)+
         geom_line(data=tibble(x=c(0.8,1.2),y=c(4,4)),
              aes(x=x,y=y),
              inherit.aes=FALSE)+
        geom_text(aes(x=3, y=4),
             label=expression(paste(italic("P"), " > 0.05")), size = 6,
             inherit.aes=FALSE)+
        geom_text(aes(x=2, y=5.5),
             label=expression(paste(italic("P"), " > 0.05")), size = 6,
             inherit.aes=FALSE)+
        geom_text(aes(x=1, y=4.5),
             label=expression(paste(italic("P"), " > 0.05")), size = 6,
             inherit.aes=FALSE)

figure2